*clear
*insheet using "$sourcedatadir/se/swedeninflation.csv", names
// For clarity copy nygdpdeflkdzg (which is the World Bank code) to a var named...
*gen inflation = nygdpdeflkdzg
// Convert the annual inflation rate series into a GDP deflator series
*gen gdpdeflator = 1 if _n == 1
*replace gdpdeflator = gdpdeflator[_n-1] * (1 + (inflation/100)) if _n > 1
// Rescale so that 2010 is the base year instead of the first year in the series (1963).
*gen rescale = gdpdeflator if year == 2010
*egen rescale2 = max(rescale)
*replace gdpdeflator = 100*gdpdeflator/rescale2
*drop rescale* source inflation nygdpdeflkdzg
*save "$gendatadir/swedendeflator.dta", replace

clear
insheet using "$sourcedatadir/se/swedencpi.csv", names
destring cpi, replace ignore(" ")
// Rescale so that 2010 is the base year instead of the first year in the series (1963).
gen rescale = cpi if year == 2010
egen rescale2 = max(rescale)
gen gdpdeflator = cpi
replace gdpdeflator = 100*gdpdeflator/rescale2
drop rescale* cpi
save "$gendatadir/swedendeflator.dta", replace

clear
insheet using "$sourcedatadir/se/sweden incumbents.csv"
sort year
save "$gendatadir/sweden incumbents.dta", replace


clear
insheet using "$sourcedatadir/se/Disposable income by deciles 1975-2011_wide.csv", names
replace varcat = "level" if varcat == ""
replace varcat = "error" if varcat != "level"
reshape wide decile* top5mean pctile95threshold , i(year) j(varcat) string

gen inc_m_quin1_scb = (decile1level + decile2level) / 2
gen inc_m_quin2_scb = (decile3level + decile4level) / 2
gen inc_m_quin3_scb = (decile5level + decile6level) / 2
gen inc_m_quin4_scb = (decile7level + decile8level) / 2
gen inc_m_quin12_scb = inc_m_quin1_scb + inc_m_quin2_scb / 2
gen inc_ll_top5_scb = pctile95thresholdlevel
gen inc_m_top5_scb = top5meanlevel
gen inc_mean_scb  = (inc_m_quin1_scb + inc_m_quin2_scb + inc_m_quin3_scb + inc_m_quin4_scb + (decile9level + decile10level)/2) /5
gen inc_share_top5 = 100*(top5meanlevel * 0.05)/inc_mean_scb
*drop decile* pctile* top5*
sort year
save "$gendatadir/swedenincomes_19752011_processed.dta", replace


clear
insheet using "$sourcedatadir/se/scb_extra_data_20150803.csv", names
save "$gendatadir/scb_extra_data_20150803.dta", replace


clear
use "$gendatadir/swedenincomes_19542010_processed.dta"
merge 1:1 year using "$gendatadir/swedendeflator.dta"
drop _merge

merge 1:1 year using "$gendatadir/scb_extra_data_20150803.dta"
drop _merge

sort year

gen inc_ul_quin1_yb = inc_ul_quin1
gen inc_ul_quin2_yb = inc_ul_quin2
gen inc_ul_quin3_yb = inc_ul_quin3
gen inc_ul_quin4_yb = inc_ul_quin4
rename inc_ll_95 inc_ll_95_yb
gen inc_mean_yb = inc_mean

replace inc_ul_quin1_yb = inc_ul_quin1/(gdpdeflator/100) // if year < 1976
replace inc_ul_quin2_yb = inc_ul_quin2/(gdpdeflator/100) // if year < 1976
replace inc_ul_quin3_yb = inc_ul_quin3/(gdpdeflator/100) // if year < 1976
replace inc_ul_quin4_yb = inc_ul_quin4/(gdpdeflator/100) // if year < 1976
replace inc_ll_95_yb    = inc_ll_95_yb/(gdpdeflator/100) // if year < 1976
replace inc_mean_yb     = inc_mean_yb/(gdpdeflator/100)  // if year < 1976
*replace inc_50       = inc_50/(gdpdeflator/100)  if year < 1976

drop inc_ul_quin1 inc_ul_quin2 inc_ul_quin3 inc_ul_quin4 inc_mean


// The next problem is to generate measures for top incomes as we don't have that for the 95th percentile.
// Instead, we have the top quintile income *share*, overall mean income, and the upper income bounds for the bottom 4 quintiles.
// From this, we can produce a measure of mean income for the entire top quintile without making distributional assumptions.
// So, with underscores indicating quintile indexes:
//
//   share_5 = 100 * mean_5 / (5 * mean)
//
//   => mean_5 = (5 * share_5 * mean) / 100

*gen inc_mean_quin5 = (5 * inc_share_quin5 * inc_mean) / 100

merge 1:1 year using "$gendatadir/sweden incumbents.dta"
drop _merge

merge 1:1 year using "$gendatadir/swedenincomes_19752011_processed.dta"
drop _merge


save "$gendatadir/swedenincomeprocessed.dta", replace
